Download data
library(readxl)
# Dowload file
download.file("http://www.apradie.com/datos/datamx2020q4.xlsx", "dataw7.xlsx", mode="wb")
dataset <- read_excel("dataw7.xlsx")
# Download market data
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
getSymbols("^MXX", from="2000-01-01", to="2019-12-31", periodicity="monthly", src="yahoo")
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## [1] "^MXX"
Merge the market returns
# Collapse monthly data to quarterly data
QMXX <- to.quarterly(MXX, indexAt = 'startof')
# Assign the Adjusted price column to QMXX
QMXX <- Ad(QMXX)
# Change column name
colnames(QMXX) <- c("IPC")
# Obtain market cc returns
QMXX$IPCrets <- diff(log(QMXX))
# Create a data frame object from QMXX
QMXX.df <- data.frame(quarter=index(QMXX), coredata(QMXX))
# Turn the common column to the same type
dataset$quarter <- as.Date(dataset$quarter)
# Many-to-1 merge
dataset <- merge(dataset, QMXX.df, by="quarter")
# Change type of the data frame into panel data
library(plm)
paneldata <- pdata.frame(dataset, index = c("firmcode", "quarter"))
Calculation and windsorization of Book to market ratio
# Keep only active firms
paneldata <- paneldata[paneldata$status == "active", ]
# Calculate bmr
paneldata$bookvalue <- paneldata$totalassets - paneldata$totalliabilities
paneldata$marketvalue<-paneldata$originalhistoricalstockprice*paneldata$sharesoutstanding
paneldata$bmr <- paneldata$bookvalue / paneldata$marketvalue
# Winsorize bmr with stataR
# We will use the winsorize function from the statar package.
# This function can work with panel data (the previous function from the robustHD
# package cannot)
# You have to specifiy the minimum and the maximum percentile
library(statar)
paneldata$bmr_w <- winsorize(paneldata$bmr, probs = c(0.02,0.98))
## 1.20 % observations replaced at the bottom
## 1.20 % observations replaced at the top
Predictions with the predict lm function
# 1. The earnings per share is the calculation of the earnings or profit divided by the number of shares of its stock. The resulting number serves as an indicator of a company's profitability.
# 2. The earnings per share per price, is a way of valuing a company that measures its price with the earnings per share. It is used to determine the relative value of the company's share.
# 3.Calculation of the EPS
paneldata$eps <- paneldata$ebit / paneldata$sharesoutstanding
#Calculate epsp
paneldata$epsp <- paneldata$eps / paneldata$originalhistoricalstockprice
# Winsorize epsp
paneldata$epsp_w <- winsorize(paneldata$epsp, probs = c(0.02,0.98))
## 0.88 % observations replaced at the bottom
## 0.88 % observations replaced at the top
# Calculate cc returns for all returns
paneldata$stockreturn <- diff(log(paneldata$adjustedstockprice))
lastq <- as.data.frame(paneldata[(paneldata$quarter=="2019-10-01"),])
# Construction of regression model
reg1 <- lm(stockreturn ~ epsp_w + bmr_w, data = lastq)
s_reg1 <- summary(reg1)
s_reg1
##
## Call:
## lm(formula = stockreturn ~ epsp_w + bmr_w, data = lastq)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42027 -0.07916 -0.01485 0.07058 0.62443
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.05782 0.02709 2.134 0.0351 *
## epsp_w -0.01946 0.14011 -0.139 0.8898
## bmr_w -0.03106 0.01915 -1.622 0.1077
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1584 on 110 degrees of freedom
## (39 observations deleted due to missingness)
## Multiple R-squared: 0.02496, Adjusted R-squared: 0.007232
## F-statistic: 1.408 on 2 and 110 DF, p-value: 0.249
manual_pred = 0.05* s_reg1$coefficients[2,1]+ 0.8*s_reg1$coefficients[3,1]+ s_reg1$coefficients[1,1]
manual_pred
## [1] 0.03199618
new_x <- data.frame(epsp_w=c(0.05), bmr_w=c(0.8))
predict.lm(reg1, newdata = new_x)
## 1
## 0.03199618
Confidence interval 0.95
pr_reg1 <- predict.lm(reg1, new_x, interval = "confidence")
pr_reg1
## fit lwr upr
## 1 0.03199618 -0.002305707 0.06629807
Yes it got the same prediction than manually.
Q: Interpret the 95% confidence interval for the prediction. A: Given the linear regression coefficients that where calculated it can be said that if the book to market ratio is 0.8 and the earnings per share per price is 0.05 the there is 95% confidence that the value of the stock return is between -0.00230 and 0.06629 which means that there is no enough statistical evidence that given the values of espp and bmr the stock price will be positive.
# Join both objects
pr_reg1.df <- cbind(new_x, pr_reg1)
pr_reg1.df
# Construct the model
reg2 <- lm(stockreturn ~ IPCrets + bmr_w, data = paneldata)
s_reg2 <- summary(reg2)
s_reg2
##
## Call:
## lm(formula = stockreturn ~ IPCrets + bmr_w, data = paneldata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.26380 -0.07630 -0.00448 0.07449 1.56551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.032240 0.002972 10.85 <2e-16 ***
## IPCrets 0.830298 0.023419 35.45 <2e-16 ***
## bmr_w -0.032087 0.002315 -13.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1675 on 7096 degrees of freedom
## (5061 observations deleted due to missingness)
## Multiple R-squared: 0.1712, Adjusted R-squared: 0.171
## F-statistic: 733.1 on 2 and 7096 DF, p-value: < 2.2e-16
95% confidence
new_x2 <- data.frame(bmr_w=0.8, IPCrets = 1)
pr_reg2 <- predict.lm(reg2, new_x2, interval = "confidence")
pr_reg2
## fit lwr upr
## 1 0.8368685 0.7916157 0.8821213
Join both
pr_reg2.df <- cbind(new_x2, pr_reg2)
pr_reg2.df
If the market return is 100% and the BMR is 0.8 then it can be said with a 95% confidence that the stock return could go between 79.1615% and 88.2121%. The prediction is 83.6868% for those values.
Prediction
new_x2b <- data.frame(IPCrets=seq(from=-0.02, to=0.02, by=0.01), bmr_w=1)
pr_reg2b <- predict.lm(reg2, new_x2b, interval = "confidence")
pr_reg2b
## fit lwr upr
## 1 -0.0164523682 -0.020726696 -0.012178040
## 2 -0.0081493921 -0.012259322 -0.004039462
## 3 0.0001535839 -0.003838245 0.004145413
## 4 0.0084565599 0.004532355 0.012380765
## 5 0.0167595360 0.012849856 0.020669215
pr_reg2b.df <- cbind(new_x2b, pr_reg2b)
colnames(pr_reg2b.df) <- c("IPCrets", "bmr_w", "Stockreturn", "lwr", "upr")
pr_reg2b.df
library(ggplot2)
ggplot(pr_reg2b.df, aes(x = IPCrets, y=Stockreturn))+
geom_point(size = 2) + geom_line() +
geom_errorbar(aes(ymax = upr, ymin=lwr))
library(prediction)
prediction(reg2, at = list(IPCrets=seq(from=-0.02, to=0.02, by=0.01), bmr_w=1))
Interpretation: From the output data it can be seen that when the BMR is constantly 1 and the market returns goes from -0.02 to 0.02 by a rate of 0.01 the stock return increases linearly, the stock return 95% confidence interval is spaced consistently between each of the stock returns predicted.
Use plm to do the linear regression model
# Load plm
library(plm)
# Use the lag() function with -1 indicating to go forward 1 period
model1 <- plm(lag(stockreturn, -1) ~ IPCrets + bmr_w, data=paneldata, model="pooling")
s_model1 <- summary(model1)
s_model1
## Pooling Model
##
## Call:
## plm(formula = lag(stockreturn, -1) ~ IPCrets + bmr_w, data = paneldata,
## model = "pooling")
##
## Unbalanced Panel: n = 146, T = 1-78, N = 7039
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.3348414 -0.0774005 -0.0027878 0.0818477 1.4644899
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 0.0017384 0.0032410 0.5364 0.5917
## IPCrets 0.2748376 0.0253327 10.8491 < 2.2e-16 ***
## bmr_w 0.0114935 0.0025282 4.5461 5.557e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 237.61
## Residual Sum of Squares: 233.09
## R-Squared: 0.019058
## Adj. R-Squared: 0.018779
## F-statistic: 68.3469 on 2 and 7036 DF, p-value: < 2.22e-16
Can be constructed also like this :
model1a<-plm(stockreturn ~ lag(IPCrets) + lag(bmr_w), data = paneldata, model="pooling")
summary(model1a)
## Pooling Model
##
## Call:
## plm(formula = stockreturn ~ lag(IPCrets) + lag(bmr_w), data = paneldata,
## model = "pooling")
##
## Unbalanced Panel: n = 146, T = 1-78, N = 7039
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.3348414 -0.0774005 -0.0027878 0.0818477 1.4644899
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 0.0017384 0.0032410 0.5364 0.5917
## lag(IPCrets) 0.2748376 0.0253327 10.8491 < 2.2e-16 ***
## lag(bmr_w) 0.0114935 0.0025282 4.5461 5.557e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 237.61
## Residual Sum of Squares: 233.09
## R-Squared: 0.019058
## Adj. R-Squared: 0.018779
## F-statistic: 68.3469 on 2 and 7036 DF, p-value: < 2.22e-16
When both values, IPC and BMR are equal to 0 then the stock return of the following day will be 0.0017384 but there is no statistical evidence to state that it will be negative because the t value is 0.5364. After considering the effect of the BMR it can be said that a change by one of the IPCreturns it will change the stock return by 0.2748376. After considering the effect of the IPCrets when the BMR increases by 1 the stock return increases by 0.0114935.
model2 <- plm(lag(stockreturn, -4) ~ bmr_w + epsp_w, data = paneldata, model="pooling")
s_model2 <- summary(model2)
s_model2
## Pooling Model
##
## Call:
## plm(formula = lag(stockreturn, -4) ~ bmr_w + epsp_w, data = paneldata,
## model = "pooling")
##
## Unbalanced Panel: n = 126, T = 1-76, N = 4756
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -1.3128451 -0.0740470 -0.0028452 0.0789605 1.4889600
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## (Intercept) 0.0019973 0.0041118 0.4857 0.62717
## bmr_w 0.0089931 0.0035967 2.5004 0.01244 *
## epsp_w -0.0066547 0.0343372 -0.1938 0.84634
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 157.48
## Residual Sum of Squares: 157.26
## R-Squared: 0.0013965
## Adj. R-Squared: 0.00097633
## F-statistic: 3.3235 on 2 and 4753 DF, p-value: 0.03611
When the BMR and EPSP are 0 then the stock returns will be approximately 0.0019973. After considering the effect of the epsp there is enough statistical evidence to say that there is a positive relation between the bmr and the stock returns of one year after. After considering the effect of the bmr there is not enough statistical evidence that the relation between the epsp is negative which means that in average the relation is negative but it can also be positive.
newx_model2 <- data.frame(bmr_w = seq(from=0.6, to=1.6, by=0.1), epsp_w=mean(paneldata$epsp_w, na.rm=TRUE))
pr1_model2 <- prediction_summary(model=model2, at=newx_model2,level=0.95)
colnames(pr1_model2) <- c("bmr_w","epsp_w", "Predicted_return")
var_b0 <- s_model2$coefficients[1,2]^2
var_b1 <- s_model2$coefficients[2,2]^2
var_b2 <- s_model2$coefficients[3,2]^2
cov_coeff <- cov(matrix(c(s_model2$coefficients[1,1], s_model2$coefficients[2,1],s_model2$coefficients[3,1])))
pr1_model2$SE <- sqrt(var_b0 + pr1_model2$bmr_w^2*var_b1 + pr1_model2$epsp_w^2*var_b2 + 2*cov_coeff)
## Warning in var_b0 + pr1_model2$bmr_w^2 * var_b1 + pr1_model2$epsp_w^2 * : Recycling array of length 1 in vector-array arithmetic is deprecated.
## Use c() or as.vector() instead.
pr1_model2$lwr <- pr1_model2$Predicted_return - 2*pr1_model2$SE
pr1_model2$upr <- pr1_model2$Predicted_return + 2*pr1_model2$SE
pr1_model2
ggplot(pr1_model2, aes(x = bmr_w, y=Predicted_return))+
geom_point(size = 2) + geom_line() +
geom_errorbar(aes(ymax = upr, ymin=lwr))
The increase of 0.1 in the BMRw doesn’t affect that much the stock return predicted which means that the coefficient of BMRw is not very high and can be seen in the slope of the graph. The 95% confidence interval covers most of the predicted returns and goes from negative to positive returns.